/*
 * Copyright 2017 - 2024 the original author or authors.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see [https://www.gnu.org/licenses/]
 */
package infra.mock.api.fileupload.util.mime;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

/**
 * Utility class to decode MIME texts.
 *
 * @since FileUpload 1.3
 */
public final class MimeUtility {

  /**
   * The marker to indicate text is encoded with BASE64 algorithm.
   */
  private static final String BASE64_ENCODING_MARKER = "B";

  /**
   * The marker to indicate text is encoded with QuotedPrintable algorithm.
   */
  private static final String QUOTEDPRINTABLE_ENCODING_MARKER = "Q";

  /**
   * If the text contains any encoded tokens, those tokens will be marked with "=?".
   */
  private static final String ENCODED_TOKEN_MARKER = "=?";

  /**
   * If the text contains any encoded tokens, those tokens will terminate with "=?".
   */
  private static final String ENCODED_TOKEN_FINISHER = "?=";

  /**
   * The linear whitespace chars sequence.
   */
  private static final String LINEAR_WHITESPACE = " \t\r\n";

  /**
   * Mappings between MIME and Java charset.
   */
  private static final Map<String, String> MIME2JAVA = new HashMap<>();

  static {
    MIME2JAVA.put("iso-2022-cn", "ISO2022CN");
    MIME2JAVA.put("iso-2022-kr", "ISO2022KR");
    MIME2JAVA.put("utf-8", "UTF8");
    MIME2JAVA.put("utf8", "UTF8");
    MIME2JAVA.put("ja_jp.iso2022-7", "ISO2022JP");
    MIME2JAVA.put("ja_jp.eucjp", "EUCJIS");
    MIME2JAVA.put("euc-kr", "KSC5601");
    MIME2JAVA.put("euckr", "KSC5601");
    MIME2JAVA.put("us-ascii", "ISO-8859-1");
    MIME2JAVA.put("x-us-ascii", "ISO-8859-1");
  }

  /**
   * Hidden constructor, this class must not be instantiated.
   */
  private MimeUtility() {
    // do nothing
  }

  /**
   * Decode a string of text obtained from a mail header into
   * its proper form.  The text generally will consist of a
   * string of tokens, some of which may be encoded using
   * base64 encoding.
   *
   * @param text The text to decode.
   * @return The decoded text string.
   * @throws UnsupportedEncodingException if the detected encoding in the input text is not supported.
   */
  public static String decodeText(final String text) throws UnsupportedEncodingException {
    // if the text contains any encoded tokens, those tokens will be marked with "=?".  If the
    // source string doesn't contain that sequent, no decoding is required.
    if (!text.contains(ENCODED_TOKEN_MARKER)) {
      return text;
    }

    int offset = 0;
    final int endOffset = text.length();

    int startWhiteSpace = -1;
    int endWhiteSpace = -1;

    final StringBuilder decodedText = new StringBuilder(text.length());

    boolean previousTokenEncoded = false;

    while (offset < endOffset) {
      char ch = text.charAt(offset);

      // is this a whitespace character?
      if (LINEAR_WHITESPACE.indexOf(ch) != -1) { // whitespace found
        startWhiteSpace = offset;
        while (offset < endOffset) {
          // step over the white space characters.
          ch = text.charAt(offset);
          if (LINEAR_WHITESPACE.indexOf(ch) == -1) {
            // record the location of the first non lwsp and drop down to process the
            // token characters.
            endWhiteSpace = offset;
            break;
          }
          offset++;
        }
      }
      else {
        // we have a word token.  We need to scan over the word and then try to parse it.
        final int wordStart = offset;

        while (offset < endOffset) {
          // step over the non white space characters.
          ch = text.charAt(offset);
          if (LINEAR_WHITESPACE.indexOf(ch) != -1) {
            break;
          }
          offset++;

          //NB:  Trailing whitespace on these header strings will just be discarded.
        }
        // pull out the word token.
        final String word = text.substring(wordStart, offset);
        // is the token encoded?  decode the word
        if (word.startsWith(ENCODED_TOKEN_MARKER)) {
          try {
            // if this gives a parsing failure, treat it like a non-encoded word.
            final String decodedWord = decodeWord(word);

            // are any whitespace characters significant?  Append 'em if we've got 'em.
            if (!previousTokenEncoded && startWhiteSpace != -1) {
              decodedText.append(text, startWhiteSpace, endWhiteSpace);
              startWhiteSpace = -1;
            }
            // this is definitely a decoded token.
            previousTokenEncoded = true;
            // and add this to the text.
            decodedText.append(decodedWord);
            // we continue parsing from here...we allow parsing errors to fall through
            // and get handled as normal text.
            continue;

          }
          catch (final ParseException e) {
            // just ignore it, skip to next word
          }
        }
        // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
        // if we have it.
        if (startWhiteSpace != -1) {
          decodedText.append(text, startWhiteSpace, endWhiteSpace);
          startWhiteSpace = -1;
        }
        // this is not a decoded token.
        previousTokenEncoded = false;
        decodedText.append(word);
      }
    }

    return decodedText.toString();
  }

  /**
   * Parse a string using the RFC 2047 rules for an "encoded-word"
   * type.  This encoding has the syntax:
   *
   * encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
   *
   * @param word The possibly encoded word value.
   * @return The decoded word.
   * @throws ParseException in case of a parse error of the RFC 2047
   * @throws UnsupportedEncodingException Thrown when Invalid RFC 2047 encoding was found
   */
  private static String decodeWord(final String word) throws ParseException, UnsupportedEncodingException {
    // encoded words start with the characters "=?".  If this not an encoded word, we throw a
    // ParseException for the caller.

    if (!word.startsWith(ENCODED_TOKEN_MARKER)) {
      throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
    }

    final int charsetPos = word.indexOf('?', 2);
    if (charsetPos == -1) {
      throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
    }

    // pull out the character set information (this is the MIME name at this point).
    final String charset = word.substring(2, charsetPos).toLowerCase(Locale.ENGLISH);

    // now pull out the encoding token the same way.
    final int encodingPos = word.indexOf('?', charsetPos + 1);
    if (encodingPos == -1) {
      throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
    }

    final String encoding = word.substring(charsetPos + 1, encodingPos);

    // and finally the encoded text.
    final int encodedTextPos = word.indexOf(ENCODED_TOKEN_FINISHER, encodingPos + 1);
    if (encodedTextPos == -1) {
      throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
    }

    final String encodedText = word.substring(encodingPos + 1, encodedTextPos);

    // seems a bit silly to encode a null string, but easy to deal with.
    if (encodedText.isEmpty()) {
      return "";
    }

    try {
      // the decoder writes directly to an output stream.
      final ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());

      byte[] decodedData;
      // Base64 encoded?
      if (encoding.equals(BASE64_ENCODING_MARKER)) {
        decodedData = Base64.getDecoder().decode(encodedText);
      }
      else if (encoding.equals(QUOTEDPRINTABLE_ENCODING_MARKER)) { // maybe quoted printable.
        byte[] encodedData = encodedText.getBytes(StandardCharsets.US_ASCII);
        QuotedPrintableDecoder.decode(encodedData, out);
        decodedData = out.toByteArray();
      }
      else {
        throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
      }
      // Convert decoded byte data into a string.
      return new String(decodedData, javaCharset(charset));
    }
    catch (final IOException e) {
      throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
    }
  }

  /**
   * Translate a MIME standard character set name into the Java
   * equivalent.
   *
   * @param charset The MIME standard name.
   * @return The Java equivalent for this name.
   */
  private static String javaCharset(final String charset) {
    // nothing in, nothing out.
    if (charset == null) {
      return null;
    }

    final String mappedCharset = MIME2JAVA.get(charset.toLowerCase(Locale.ENGLISH));
    // if there is no mapping, then the original name is used.  Many of the MIME character set
    // names map directly back into Java.  The reverse isn't necessarily true.
    if (mappedCharset == null) {
      return charset;
    }
    return mappedCharset;
  }

}
